I import the libraries.
require(data.table)
## Loading required package: data.table
require(lubridate)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
require(forecast)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
require(skimr)
## Loading required package: skimr
require(repr)
## Loading required package: repr
require(openxlsx)
## Loading required package: openxlsx
require(ggplot2)
## Loading required package: ggplot2
require(data.table)
require(skimr)
require(GGally)
## Loading required package: GGally
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
require(ggcorrplot)
## Loading required package: ggcorrplot
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ stringr 1.5.1
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
I define the plot sizes.
options(repr.plot.width=12, repr.plot.height=8)
I import the data that I will use and I make some datatype modification in order to be able to use them.
data_path="/Users/serhatekli/Desktop/23-24\ spring/IE360/EVDS-28.xlsx"
data=read.xlsx(data_path)
head(data)
## Tarih TP.AKONUTSAT1.TOPLAM TP.YISGUCU2.G8 TP.TG2.Y17
## 1 2014-01 87639 10.5 15.11225
## 2 2014-02 82597 10.5 11.43362
## 3 2014-03 87617 9.4 11.33385
## 4 2014-04 83610 8.8 12.54449
## 5 2014-05 90377 8.4 15.56560
## 6 2014-06 92936 9 11.88313
str(data)
## 'data.frame': 146 obs. of 4 variables:
## $ Tarih : chr "2014-01" "2014-02" "2014-03" "2014-04" ...
## $ TP.AKONUTSAT1.TOPLAM: chr "87639" "82597" "87617" "83610" ...
## $ TP.YISGUCU2.G8 : chr "10.5" "10.5" "9.4" "8.8" ...
## $ TP.TG2.Y17 : num 15.1 11.4 11.3 12.5 15.6 ...
data$Tarih <- ym(data$Tarih)
## Warning: 8 failed to parse.
data$TP.YISGUCU2.G8 <- as.numeric(data$TP.YISGUCU2.G8)
## Warning: NAs introduced by coercion
data$TP.AKONUTSAT1.TOPLAM <- as.numeric(data$TP.AKONUTSAT1.TOPLAM)
## Warning: NAs introduced by coercion
data$TP.TG2.Y17 <- as.numeric(data$TP.TG2.Y17)
str(data)
## 'data.frame': 146 obs. of 4 variables:
## $ Tarih : Date, format: "2014-01-01" "2014-02-01" ...
## $ TP.AKONUTSAT1.TOPLAM: num 87639 82597 87617 83610 90377 ...
## $ TP.YISGUCU2.G8 : num 10.5 10.5 9.4 8.8 8.4 9 9.7 10.1 10.1 10.9 ...
## $ TP.TG2.Y17 : num 15.1 11.4 11.3 12.5 15.6 ...
data <- na.omit(data)
data_path="/Users/serhatekli/Desktop/23-24\ spring/IE360/EVDS-29.xlsx"
konut_satis=read.xlsx(data_path)
head(konut_satis)
## Tarih TP.DK.EUR.C.YTL TP.KTF12
## 1 2014-01 1.3630090909091 11.3
## 2 2014-02 1.364635 13.03
## 3 2014-03 1.3823476190476 13.505
## 4 2014-04 1.3811142857143 13.52
## 5 2014-05 1.374815 12.944
## 6 2014-06 1.3592333333333 12.2875
str(konut_satis)
## 'data.frame': 134 obs. of 3 variables:
## $ Tarih : chr "2014-01" "2014-02" "2014-03" "2014-04" ...
## $ TP.DK.EUR.C.YTL: chr "1.3630090909091" "1.364635" "1.3823476190476" "1.3811142857143" ...
## $ TP.KTF12 : chr "11.3" "13.03" "13.505" "13.52" ...
konut_satis$Tarih <- ym(konut_satis$Tarih)
## Warning: 6 failed to parse.
konut_satis$TP.DK.EUR.C.YTL <- as.numeric(konut_satis$TP.DK.EUR.C.YTL)
## Warning: NAs introduced by coercion
konut_satis$TP.KTF12 <- as.numeric(konut_satis$TP.KTF12)
## Warning: NAs introduced by coercion
str(konut_satis)
## 'data.frame': 134 obs. of 3 variables:
## $ Tarih : Date, format: "2014-01-01" "2014-02-01" ...
## $ TP.DK.EUR.C.YTL: num 1.36 1.36 1.38 1.38 1.37 ...
## $ TP.KTF12 : num 11.3 13 13.5 13.5 12.9 ...
konut_satis <- na.omit(konut_satis)
data_path="/Users/serhatekli/Desktop/23-24\ spring/IE360/EVDS-30.xlsx"
araba_alma_beklentisi=read.xlsx(data_path)
head(araba_alma_beklentisi)
## Tarih TP.BRENTPETROL.EUBP TP.KTF11
## 1 2014-01 108.16 12.84
## 2 2014-02 108.98 15.0925
## 3 2014-03 105.95 15.21
## 4 2014-04 108.63 14.91
## 5 2014-05 109.21 14.288
## 6 2014-06 111.03 13.305
str(araba_alma_beklentisi)
## 'data.frame': 137 obs. of 3 variables:
## $ Tarih : chr "2014-01" "2014-02" "2014-03" "2014-04" ...
## $ TP.BRENTPETROL.EUBP: chr "108.16" "108.98" "105.95" "108.63" ...
## $ TP.KTF11 : chr "12.84" "15.0925" "15.21" "14.91" ...
araba_alma_beklentisi$Tarih <- ym(araba_alma_beklentisi$Tarih)
## Warning: 6 failed to parse.
araba_alma_beklentisi$TP.BRENTPETROL.EUBP <- as.numeric(araba_alma_beklentisi$TP.BRENTPETROL.EUBP)
## Warning: NAs introduced by coercion
araba_alma_beklentisi$TP.KTF11 <- as.numeric(araba_alma_beklentisi$TP.KTF11)
## Warning: NAs introduced by coercion
str(araba_alma_beklentisi)
## 'data.frame': 137 obs. of 3 variables:
## $ Tarih : Date, format: "2014-01-01" "2014-02-01" ...
## $ TP.BRENTPETROL.EUBP: num 108 109 106 109 109 ...
## $ TP.KTF11 : num 12.8 15.1 15.2 14.9 14.3 ...
araba_alma_beklentisi <- na.omit(araba_alma_beklentisi)
data_path="/Users/serhatekli/Desktop/23-24\ spring/IE360/EVDS-31.xlsx"
issizlik_orani=read.xlsx(data_path)
head(issizlik_orani)
## Tarih TP.DK.USD.C.YTL TP.KAP2.TOP.A
## 1 2014-01 1 2892
## 2 2014-02 1 1181
## 3 2014-03 1 1073
## 4 2014-04 1 937
## 5 2014-05 1 968
## 6 2014-06 1 1120
str(issizlik_orani)
## 'data.frame': 135 obs. of 3 variables:
## $ Tarih : chr "2014-01" "2014-02" "2014-03" "2014-04" ...
## $ TP.DK.USD.C.YTL: chr "1" "1" "1" "1" ...
## $ TP.KAP2.TOP.A : chr "2892" "1181" "1073" "937" ...
issizlik_orani$Tarih <- ym(issizlik_orani$Tarih)
## Warning: 6 failed to parse.
issizlik_orani$TP.DK.USD.C.YTL <- as.numeric(issizlik_orani$TP.DK.USD.C.YTL)
## Warning: NAs introduced by coercion
issizlik_orani$TP.KAP2.TOP.A <- as.numeric(issizlik_orani$TP.KAP2.TOP.A)
## Warning: NAs introduced by coercion
str(issizlik_orani)
## 'data.frame': 135 obs. of 3 variables:
## $ Tarih : Date, format: "2014-01-01" "2014-02-01" ...
## $ TP.DK.USD.C.YTL: num 1 1 1 1 1 1 1 1 1 1 ...
## $ TP.KAP2.TOP.A : num 2892 1181 1073 937 968 ...
issizlik_orani <- na.omit(issizlik_orani)
summary_data = skim(data)
print(summary_data)
## ── Data Summary ────────────────────────
## Values
## Name data
## Number of rows 121
## Number of columns 4
## _______________________
## Column type frequency:
## Date 1
## numeric 3
## ________________________
## Group variables None
##
## ── Variable type: Date ─────────────────────────────────────────────────────────
## skim_variable n_missing complete_rate min max median
## 1 Tarih 0 1 2014-01-01 2024-01-01 2019-01-01
## n_unique
## 1 121
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable n_missing complete_rate mean sd p0
## 1 TP.AKONUTSAT1.TOPLAM 0 1 113327. 30531. 42783
## 2 TP.YISGUCU2.G8 0 1 11.2 1.61 8.2
## 3 TP.TG2.Y17 0 1 12.2 1.97 7.25
## p25 p50 p75 p100 hist
## 1 95389 109548 123878 229357 ▁▇▅▁▁
## 2 10 10.7 12.5 15.1 ▂▇▃▃▂
## 3 10.8 12.0 13.3 19.3 ▁▇▇▂▁
I print the correlation matrix.
require(GGally)
ggpairs(data)
I get the data from Google Trends, the name files are my search keywords.
issizlik_orani_trend = "/Users/serhatekli/Desktop/23-24\ spring/IE360/is_ilanlari.csv"
araba_alma_beklentisi_trend = "/Users/serhatekli/Desktop/23-24\ spring/IE360/araba_fiyatlari.csv"
konut_satis_trend = "/Users/serhatekli/Desktop/23-24\ spring/IE360/konut_kredisi.csv"
issizlik_orani_trend = read.csv(issizlik_orani_trend,header = TRUE,stringsAsFactors = FALSE, sep = ",")
araba_alma_beklentisi_trend = read.csv(araba_alma_beklentisi_trend,header = TRUE,stringsAsFactors=FALSE, sep = ",")
konut_satis_trend = read.csv(konut_satis_trend,header = TRUE,stringsAsFactors=FALSE, sep = ",")
str(issizlik_orani_trend)
## 'data.frame': 124 obs. of 2 variables:
## $ Ay : chr "2014-01" "2014-02" "2014-03" "2014-04" ...
## $ iş.ilanları: int 45 46 40 40 38 45 44 46 51 47 ...
str(araba_alma_beklentisi_trend)
## 'data.frame': 124 obs. of 2 variables:
## $ Ay : chr "2014-01" "2014-02" "2014-03" "2014-04" ...
## $ araba.fiyatları: int 37 37 35 35 35 38 40 47 44 45 ...
str(konut_satis_trend)
## 'data.frame': 124 obs. of 2 variables:
## $ Ay : chr "2014-01" "2014-02" "2014-03" "2014-04" ...
## $ konut.kredisi: int 9 9 7 8 8 8 9 10 10 8 ...
issizlik_orani_trend$Ay <- ym(issizlik_orani_trend$Ay)
araba_alma_beklentisi_trend$Ay <- ym(araba_alma_beklentisi_trend$Ay)
konut_satis_trend$Ay <- ym(konut_satis_trend$Ay)
Then, I plot the time series one by one.
ggplot(data, aes(x=Tarih,y=TP.YISGUCU2.G8)) + geom_line()
ggplot(issizlik_orani_trend, aes(x=Ay,y=iş.ilanları)) + geom_line()
ggplot(data ,aes(x=Tarih,y=data$TP.AKONUTSAT1.TOPLAM)) + geom_line()
ggplot(konut_satis_trend, aes(x=Ay,y=konut.kredisi)) + geom_line()
ggplot(data ,aes(x=Tarih,y=data$TP.TG2.Y17)) + geom_line()
ggplot(araba_alma_beklentisi_trend, aes(x=Ay,y=araba.fiyatları)) + geom_line()
I merge the data and plot the coefficient matrices.
colnames(issizlik_orani_trend)[colnames(issizlik_orani_trend)=='Ay'] <- 'Tarih'
set <- list(data,issizlik_orani_trend,issizlik_orani)
issizlik_orani_data <- set %>% reduce(inner_join, by='Tarih')
head(issizlik_orani_data)
## Tarih TP.AKONUTSAT1.TOPLAM TP.YISGUCU2.G8 TP.TG2.Y17 iş.ilanları
## 1 2014-01-01 87639 10.5 15.11225 45
## 2 2014-02-01 82597 10.5 11.43362 46
## 3 2014-03-01 87617 9.4 11.33385 40
## 4 2014-04-01 83610 8.8 12.54449 40
## 5 2014-05-01 90377 8.4 15.56560 38
## 6 2014-06-01 92936 9.0 11.88313 45
## TP.DK.USD.C.YTL TP.KAP2.TOP.A
## 1 1 2892
## 2 1 1181
## 3 1 1073
## 4 1 937
## 5 1 968
## 6 1 1120
ggpairs(issizlik_orani_data)
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
colnames(konut_satis_trend)[colnames(konut_satis_trend)=='Ay'] <- 'Tarih'
set <- list(data,konut_satis_trend,konut_satis)
konut_satis_data <- set %>% reduce(inner_join, by='Tarih')
head(konut_satis_data)
## Tarih TP.AKONUTSAT1.TOPLAM TP.YISGUCU2.G8 TP.TG2.Y17 konut.kredisi
## 1 2014-01-01 87639 10.5 15.11225 9
## 2 2014-02-01 82597 10.5 11.43362 9
## 3 2014-03-01 87617 9.4 11.33385 7
## 4 2014-04-01 83610 8.8 12.54449 8
## 5 2014-05-01 90377 8.4 15.56560 8
## 6 2014-06-01 92936 9.0 11.88313 8
## TP.DK.EUR.C.YTL TP.KTF12
## 1 1.363009 11.3000
## 2 1.364635 13.0300
## 3 1.382348 13.5050
## 4 1.381114 13.5200
## 5 1.374815 12.9440
## 6 1.359233 12.2875
ggpairs(konut_satis_data)
colnames(araba_alma_beklentisi_trend)[colnames(araba_alma_beklentisi_trend)=='Ay'] <- 'Tarih'
set <- list(data,araba_alma_beklentisi_trend,araba_alma_beklentisi)
araba_alma_beklentisi_data <- set %>% reduce(inner_join, by='Tarih')
head(araba_alma_beklentisi_data)
## Tarih TP.AKONUTSAT1.TOPLAM TP.YISGUCU2.G8 TP.TG2.Y17 araba.fiyatları
## 1 2014-01-01 87639 10.5 15.11225 37
## 2 2014-02-01 82597 10.5 11.43362 37
## 3 2014-03-01 87617 9.4 11.33385 35
## 4 2014-04-01 83610 8.8 12.54449 35
## 5 2014-05-01 90377 8.4 15.56560 35
## 6 2014-06-01 92936 9.0 11.88313 38
## TP.BRENTPETROL.EUBP TP.KTF11
## 1 108.16 12.8400
## 2 108.98 15.0925
## 3 105.95 15.2100
## 4 108.63 14.9100
## 5 109.21 14.2880
## 6 111.03 13.3050
ggpairs(araba_alma_beklentisi_data)
I make the linear regression models.
ggplot(data ,aes(x=Tarih,y=data$TP.YISGUCU2.G8)) + geom_point()+geom_smooth(method='lm')
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data ,aes(x=Tarih,y=data$TP.AKONUTSAT1.TOPLAM)) + geom_point()+geom_smooth(method='lm')
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data ,aes(x=Tarih,y=data$TP.TG2.Y17)) + geom_point()+geom_smooth(method='lm')
## `geom_smooth()` using formula = 'y ~ x'
Then, I handle the data preprocessing and visualization tasks for unemployment rates. I convert the dataset into a data table, by adding trend, year, and month columns. Also, I fit a LM model, check the residuals, and visualize the actual and predicted trends over time.
issizlik_orani_data <- as.data.table(issizlik_orani_data)
issizlik_orani_data[,trnd:=1:.N]
issizlik_orani_data[,yil:=as.character(month(Tarih,label=T))]
issizlik_orani_data[,ay:=as.character(month(Tarih,label=T))]
head(issizlik_orani_data)
## Tarih TP.AKONUTSAT1.TOPLAM TP.YISGUCU2.G8 TP.TG2.Y17 iş.ilanları
## <Date> <num> <num> <num> <int>
## 1: 2014-01-01 87639 10.5 15.11225 45
## 2: 2014-02-01 82597 10.5 11.43362 46
## 3: 2014-03-01 87617 9.4 11.33385 40
## 4: 2014-04-01 83610 8.8 12.54449 40
## 5: 2014-05-01 90377 8.4 15.56560 38
## 6: 2014-06-01 92936 9.0 11.88313 45
## TP.DK.USD.C.YTL TP.KAP2.TOP.A trnd yil ay
## <num> <num> <int> <char> <char>
## 1: 1 2892 1 Jan Jan
## 2: 1 1181 2 Feb Feb
## 3: 1 1073 3 Mar Mar
## 4: 1 937 4 Apr Apr
## 5: 1 968 5 May May
## 6: 1 1120 6 Jun Jun
ggplot(issizlik_orani_data,aes(x=TP.YISGUCU2.G8,y=TP.DK.USD.C.YTL)) +
geom_point() + geom_smooth(method=lm,linewidth=3) + facet_wrap(~ay)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(issizlik_orani_data,aes(x=TP.YISGUCU2.G8,y=TP.KAP2.TOP.A)) +
geom_point() + geom_smooth(method=lm,linewidth=3) + facet_wrap(~ay)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(issizlik_orani_data,aes(x=TP.YISGUCU2.G8,y=issizlik_orani_data$iş.ilanları)) +
geom_point() + geom_smooth(method=lm,linewidth=3) + facet_wrap(~ay)
## `geom_smooth()` using formula = 'y ~ x'
lm_base=lm(TP.YISGUCU2.G8~trnd+ay+yil+issizlik_orani_data$iş.ilanları+TP.DK.USD.C.YTL+TP.KAP2.TOP.A,issizlik_orani_data)
summary(lm_base)
##
## Call:
## lm(formula = TP.YISGUCU2.G8 ~ trnd + ay + yil + issizlik_orani_data$iş.ilanları +
## TP.DK.USD.C.YTL + TP.KAP2.TOP.A, data = issizlik_orani_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.28238 -0.70380 -0.02254 0.73210 3.09754
##
## Coefficients: (12 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.6213072 0.6450643 11.815 < 2e-16 ***
## trnd 0.0097916 0.0036032 2.717 0.00769 **
## ayAug -0.1849912 0.5427158 -0.341 0.73388
## ayDec 1.8120095 0.7229803 2.506 0.01372 *
## ayFeb 0.4819341 0.5455924 0.883 0.37906
## ayJan 1.4749051 0.5828770 2.530 0.01286 *
## ayJul -0.1410176 0.5538822 -0.255 0.79953
## ayJun -0.7027851 0.5449197 -1.290 0.19996
## ayMar 0.0540787 0.5355713 0.101 0.91976
## ayMay -0.4942270 0.5289426 -0.934 0.35224
## ayNov -0.2574525 0.5474848 -0.470 0.63915
## ayOct -0.3787927 0.5584239 -0.678 0.49904
## aySep -0.5233098 0.5624552 -0.930 0.35428
## yilAug NA NA NA NA
## yilDec NA NA NA NA
## yilFeb NA NA NA NA
## yilJan NA NA NA NA
## yilJul NA NA NA NA
## yilJun NA NA NA NA
## yilMar NA NA NA NA
## yilMay NA NA NA NA
## yilNov NA NA NA NA
## yilOct NA NA NA NA
## yilSep NA NA NA NA
## issizlik_orani_data$iş.ilanları 0.0633049 0.0089711 7.057 1.83e-10 ***
## TP.DK.USD.C.YTL NA NA NA NA
## TP.KAP2.TOP.A -0.0007394 0.0002201 -3.359 0.00109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.183 on 106 degrees of freedom
## Multiple R-squared: 0.5223, Adjusted R-squared: 0.4592
## F-statistic: 8.279 on 14 and 106 DF, p-value: 1.003e-11
checkresiduals(lm_base$residuals)
##
## Ljung-Box test
##
## data: Residuals
## Q* = 352.3, df = 10, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 10
tmp=copy(issizlik_orani_data)
tmp[,actual:=TP.YISGUCU2.G8]
tmp[,predicted_trend:=predict(lm_base,tmp)]
## Warning in predict.lm(lm_base, tmp): prediction from a rank-deficient fit may
## be misleading
tmp[,residual_trend:=actual-predicted_trend]
#head(tmp)
ggplot(tmp ,aes(x=Tarih)) +
geom_line(aes(y=actual,color='real')) +
geom_line(aes(y=predicted_trend,color='predicted'))
I make the same thing for the “expectation of buying a car in 12 months”.
araba_alma_beklentisi_data <- as.data.table(araba_alma_beklentisi_data)
araba_alma_beklentisi_data[,trnd:=1:.N]
araba_alma_beklentisi_data[,yil:=as.character(month(Tarih,label=T))]
araba_alma_beklentisi_data[,ay:=as.character(month(Tarih,label=T))]
head(araba_alma_beklentisi_data)
## Tarih TP.AKONUTSAT1.TOPLAM TP.YISGUCU2.G8 TP.TG2.Y17 araba.fiyatları
## <Date> <num> <num> <num> <int>
## 1: 2014-01-01 87639 10.5 15.11225 37
## 2: 2014-02-01 82597 10.5 11.43362 37
## 3: 2014-03-01 87617 9.4 11.33385 35
## 4: 2014-04-01 83610 8.8 12.54449 35
## 5: 2014-05-01 90377 8.4 15.56560 35
## 6: 2014-06-01 92936 9.0 11.88313 38
## TP.BRENTPETROL.EUBP TP.KTF11 trnd yil ay
## <num> <num> <int> <char> <char>
## 1: 108.16 12.8400 1 Jan Jan
## 2: 108.98 15.0925 2 Feb Feb
## 3: 105.95 15.2100 3 Mar Mar
## 4: 108.63 14.9100 4 Apr Apr
## 5: 109.21 14.2880 5 May May
## 6: 111.03 13.3050 6 Jun Jun
ggplot(araba_alma_beklentisi_data,aes(x=TP.TG2.Y17,y=TP.BRENTPETROL.EUBP)) +
geom_point() + geom_smooth(method=lm,linewidth=3) + facet_wrap(~ay)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(araba_alma_beklentisi_data,aes(x=TP.TG2.Y17,y=TP.KTF11)) +
geom_point() + geom_smooth(method=lm,linewidth=3) + facet_wrap(~ay)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(araba_alma_beklentisi_data,aes(x=TP.TG2.Y17,y=araba_alma_beklentisi_data$araba.fiyatları)) +
geom_point() + geom_smooth(method=lm,linewidth=3) + facet_wrap(~ay)
## `geom_smooth()` using formula = 'y ~ x'
lm_base=lm(TP.TG2.Y17~trnd+ay+yil+araba_alma_beklentisi_data$araba.fiyatları+TP.BRENTPETROL.EUBP+TP.KTF11,araba_alma_beklentisi_data)
summary(lm_base)
##
## Call:
## lm(formula = TP.TG2.Y17 ~ trnd + ay + yil + araba_alma_beklentisi_data$araba.fiyatları +
## TP.BRENTPETROL.EUBP + TP.KTF11, data = araba_alma_beklentisi_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7419 -1.1405 0.0202 1.2013 5.8523
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 10.318199 1.279056 8.067
## trnd -0.003965 0.012203 -0.325
## ayAug -1.004931 0.961286 -1.045
## ayDec -1.490285 0.892941 -1.669
## ayFeb -1.259573 0.880405 -1.431
## ayJan -0.608744 0.865224 -0.704
## ayJul -1.018624 0.978677 -1.041
## ayJun -1.174528 0.921507 -1.275
## ayMar -0.997876 0.878337 -1.136
## ayMay -0.249748 0.878719 -0.284
## ayNov -0.865831 0.908642 -0.953
## ayOct -1.426653 0.892106 -1.599
## aySep -1.797173 0.909718 -1.976
## yilAug NA NA NA
## yilDec NA NA NA
## yilFeb NA NA NA
## yilJan NA NA NA
## yilJul NA NA NA
## yilJun NA NA NA
## yilMar NA NA NA
## yilMay NA NA NA
## yilNov NA NA NA
## yilOct NA NA NA
## yilSep NA NA NA
## araba_alma_beklentisi_data$araba.fiyatları 0.028539 0.024183 1.180
## TP.BRENTPETROL.EUBP 0.007547 0.009035 0.835
## TP.KTF11 0.046267 0.039499 1.171
## Pr(>|t|)
## (Intercept) 1.25e-12 ***
## trnd 0.7459
## ayAug 0.2982
## ayDec 0.0981 .
## ayFeb 0.1555
## ayJan 0.4833
## ayJul 0.3004
## ayJun 0.2053
## ayMar 0.2585
## ayMay 0.7768
## ayNov 0.3428
## ayOct 0.1128
## aySep 0.0508 .
## yilAug NA
## yilDec NA
## yilFeb NA
## yilJan NA
## yilJul NA
## yilJun NA
## yilMar NA
## yilMay NA
## yilNov NA
## yilOct NA
## yilSep NA
## araba_alma_beklentisi_data$araba.fiyatları 0.2406
## TP.BRENTPETROL.EUBP 0.4054
## TP.KTF11 0.2441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.961 on 105 degrees of freedom
## Multiple R-squared: 0.1316, Adjusted R-squared: 0.007588
## F-statistic: 1.061 on 15 and 105 DF, p-value: 0.4012
checkresiduals(lm_base$residuals)
##
## Ljung-Box test
##
## data: Residuals
## Q* = 90.755, df = 10, p-value = 3.775e-15
##
## Model df: 0. Total lags used: 10
tmp=copy(araba_alma_beklentisi_data)
tmp[,actual:=TP.TG2.Y17]
tmp[,predicted_trend:=predict(lm_base,tmp)]
## Warning in predict.lm(lm_base, tmp): prediction from a rank-deficient fit may
## be misleading
tmp[,residual_trend:=actual-predicted_trend]
ggplot(tmp ,aes(x=Tarih)) +
geom_line(aes(y=actual,color='real')) +
geom_line(aes(y=predicted_trend,color='predicted'))
And, I make the same thing again for house sales.
konut_satis_data <- as.data.table(konut_satis_data)
konut_satis_data[,trnd:=1:.N]
konut_satis_data[,yil:=as.character(month(Tarih,label=T))]
konut_satis_data[,ay:=as.character(month(Tarih,label=T))]
head(konut_satis_data)
## Tarih TP.AKONUTSAT1.TOPLAM TP.YISGUCU2.G8 TP.TG2.Y17 konut.kredisi
## <Date> <num> <num> <num> <int>
## 1: 2014-01-01 87639 10.5 15.11225 9
## 2: 2014-02-01 82597 10.5 11.43362 9
## 3: 2014-03-01 87617 9.4 11.33385 7
## 4: 2014-04-01 83610 8.8 12.54449 8
## 5: 2014-05-01 90377 8.4 15.56560 8
## 6: 2014-06-01 92936 9.0 11.88313 8
## TP.DK.EUR.C.YTL TP.KTF12 trnd yil ay
## <num> <num> <int> <char> <char>
## 1: 1.363009 11.3000 1 Jan Jan
## 2: 1.364635 13.0300 2 Feb Feb
## 3: 1.382348 13.5050 3 Mar Mar
## 4: 1.381114 13.5200 4 Apr Apr
## 5: 1.374815 12.9440 5 May May
## 6: 1.359233 12.2875 6 Jun Jun
ggplot(konut_satis_data,aes(x=TP.AKONUTSAT1.TOPLAM,y=TP.DK.EUR.C.YTL)) +
geom_point() + geom_smooth(method=lm,linewidth=3) + facet_wrap(~ay)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(konut_satis_data,aes(x=TP.AKONUTSAT1.TOPLAM,y=TP.KTF12)) +
geom_point() + geom_smooth(method=lm,linewidth=3) + facet_wrap(~ay)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(konut_satis_data,aes(x=TP.AKONUTSAT1.TOPLAM,y=konut_satis_data$konut.kredisi)) +
geom_point() + geom_smooth(method=lm,linewidth=3) + facet_wrap(~ay)
## `geom_smooth()` using formula = 'y ~ x'
lm_base=lm(TP.AKONUTSAT1.TOPLAM~trnd+ay+yil+konut_satis_data$konut.kredisi+TP.DK.EUR.C.YTL+TP.KTF12,konut_satis_data)
summary(lm_base)
##
## Call:
## lm(formula = TP.AKONUTSAT1.TOPLAM ~ trnd + ay + yil + konut_satis_data$konut.kredisi +
## TP.DK.EUR.C.YTL + TP.KTF12, data = konut_satis_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -66227 -13163 1285 9895 86695
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109468.38 37137.88 2.948 0.003946 **
## trnd 212.13 91.37 2.322 0.022181 *
## ayAug 18772.27 10434.57 1.799 0.074884 .
## ayDec 60849.94 10295.43 5.910 4.28e-08 ***
## ayFeb -6405.70 10244.95 -0.625 0.533161
## ayJan -9560.54 10133.18 -0.943 0.347597
## ayJul 15092.80 10243.00 1.473 0.143615
## ayJun 10110.54 10348.87 0.977 0.330829
## ayMar 12729.69 10197.44 1.248 0.214690
## ayMay -3470.31 10252.06 -0.338 0.735663
## ayNov 23601.37 10300.95 2.291 0.023950 *
## ayOct 24080.72 10293.76 2.339 0.021210 *
## aySep 24447.14 10326.52 2.367 0.019744 *
## yilAug NA NA NA NA
## yilDec NA NA NA NA
## yilFeb NA NA NA NA
## yilJan NA NA NA NA
## yilJul NA NA NA NA
## yilJun NA NA NA NA
## yilMar NA NA NA NA
## yilMay NA NA NA NA
## yilNov NA NA NA NA
## yilOct NA NA NA NA
## yilSep NA NA NA NA
## konut_satis_data$konut.kredisi 664.44 192.79 3.446 0.000818 ***
## TP.DK.EUR.C.YTL -8793.66 29794.87 -0.295 0.768470
## TP.KTF12 -1448.66 409.92 -3.534 0.000610 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22770 on 105 degrees of freedom
## Multiple R-squared: 0.5132, Adjusted R-squared: 0.4437
## F-statistic: 7.38 on 15 and 105 DF, p-value: 7.195e-11
checkresiduals(lm_base$residuals)
##
## Ljung-Box test
##
## data: Residuals
## Q* = 22.036, df = 10, p-value = 0.01492
##
## Model df: 0. Total lags used: 10
tmp=copy(konut_satis_data)
tmp[,actual:=TP.AKONUTSAT1.TOPLAM]
tmp[,predicted_trend:=predict(lm_base,tmp)]
## Warning in predict.lm(lm_base, tmp): prediction from a rank-deficient fit may
## be misleading
tmp[,residual_trend:=actual-predicted_trend]
ggplot(tmp ,aes(x=Tarih)) +
geom_line(aes(y=actual,color='real')) +
geom_line(aes(y=predicted_trend,color='predicted'))
In this homework, I tried to demonstrate my time series data manipulation and regression skills on the data I chose from https://evds2.tcmb.gov.tr/ and https://trends.google.com/trends/. From EVDS, I chose 3 types of series, which are unemployment rate, people’s expectations of buying a car in the next 12 months, and total house sales statistics in Turkey. I chose 2 possible variables for each one of them that may have a effect on those. In addition to those variables, I use the Google trends data by choosing a relevant keyword and try to predict the series.
First of all, I checked whether the time series I selected are correlated or not. For this, I made a correlation matrix for the variable named “data”, which includes the time series that I selected. After printing it “ggpairs(data)”, I saw that their absolute correlation coefficients are less than 0.5. Then, I loaded the “issizlik_orani”, “araba_alma_beklentisi”, and “konut_satis” data from EVDS, and csv files from Google Trends; and made some modification in order to utilize them. Below, you can find the variables that I used for each time series.
issizlik_orani: “TP.DK.USD.C.YTL”, which is US dollars cross rate; “TP.KAP2.TOP.A”, which is the total number of companies closed; and “iş ilanları” keyword
araba_alma_beklentisi: “TP.BRENTPETROL.EUBP”, which is Europe Brent Spot Price FOB; “TP.KTF11”, which is vehicle loan interest rate; and “araba fiyatları” keyword
konut_satis: “TP.DK.EUR.C.YTL”, which is Euro cross rate; “TP.KTF12”, which is mortgage interest rate; and “konut kredisi” keyword
By looking at the consolidated correlation matrices, I found out that unemployment rate is correlated with the “iş ilanları” Google search. Also, the total numbers of companies closed is negatively correlated with unemployment rate, which is not surprising; but, it is not that significant, just -0.123.
Even though they are not significant, people’s expectations of buying a car in the next 12 months are positively correlated with the brent price, vehicle loan interest rate, and “araba fiyatları” Google search.
While house sales statistics is positively correlated with “konut kredisi” Google search, it is negatively correlated with Euro cross rate, and mortgage interest rate.
In this part, I handled the data preprocessing and visualization tasks for unemployment rates, expectation of buying a car, and house sales, by using the other time series that I had. I converted the dataset into a data table, by adding trend, year, and month columns. Also, I fitted LM model for each of them, checked the residuals and ACF plots to assess the adequacy of the regression models, implemented the Ljung-Box test, and visualized the actual and predicted trends over time.
For the unemployment rate data, I had an R-squared value of 0.5223, which means that 52% of the variation in the dependent variable can be explained by the independent variables. However, when I looked at the residuals, even though they may be regarded as normally distributed, I saw that there is a strong autocorrelation, which means that making predictions and inferences may be misleading.
For the people’s expectations of buying a car in the next 12 months, residual graphs seemed okay but I had a very low R-squared value. Also, when I looked at predicted and real data, it can be understood that predicted and real values do not move together, and it is an unreliable model.
For the house sales data, I again had an R-squared value of 0.51, and there was not a strong autocorrelation. When I looked at the actual and predicted graph, I saw that the predictions moved together with the actual data when there was an up. Also, for the real values, there was a strong down and up in 2020, which may be explained by the lockdown for Covid-19 pandemics.
This homework provided me a valuable insight into time series data manipulation and regression topics. Being exposed to R programming language during the homework, I learned how to prepare and visualize the data. Also, by making some comments on time series and forecasting, I believe that I become more familiar with the lecture topics.